In [1]:
%matplotlib inline
import copy
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
import os
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, Legend
from bokeh.plotting import figure, show, output_file
output_notebook()
Loading BokehJS ...
In [2]:
with open('AllEurasia.poplist.txt') as f:
    lines = f.readlines()
all_eurasia = [line.strip() for line in lines]
In [3]:
ind_df = pd.read_csv('HumanOrigins_FennoScandian_small.ind', names=['sample_id', 'gender', 'status'], sep="\s+") 
ind_df = ind_df.astype({'sample_id': 'str', 'gender': 'str', 'status': 'str'})
status_arr = ind_df['status'][ind_df['status'].isin(all_eurasia)].to_numpy()
In [4]:
def visualize(file_name, name=None):
    with open(file_name, 'rb') as f:
        pcs = np.load(f)
    if name is None:
        name = '.'.join(file_name.split('/')[-1].split('.')[:-1])
    
    
    colors = [u'#1f77b4', u'#ff7f0e', u'#2ca02c', u'#d62728', u'#9467bd',
            u'#8c564b', u'#e377c2', u'#7f7f7f', u'#bcbd22', u'#17becf']
    markers = ['circle', 'square', 'triangle', 'plus', 'star', 'hex', 'hex_dot', 'cross', 'x', 'y', 'diamond_dot', 'diamond', 'circle_dot', 'square_dot']
    # ["8", "s", "p", "P", "*", "h", "H", "+", "x", "X", "D", "d", "8", "s", "p"]
    
    TOOLTIPS = [
        ("index", "$index"),
        ("(x,y)", "($x, $y)"),
        ("label", "@label"),
    ]
    p = figure(plot_width=1500, plot_height=800, tooltips=TOOLTIPS)
    p.title.text = name
    
    items = []
    legenddict = {}
    for idx, pop in enumerate(all_eurasia):
        ind = np.where(status_arr == pop)[0]
        source = ColumnDataSource(
            data={'x': pcs[ind, 0], 'y': pcs[ind, 1], 'label': [pop] * len(ind)}
        )
        legenddict[pop] = p.scatter('x', 'y', legend_label=pop, color=colors[idx // len(markers)], marker=markers[idx % len(markers)], source=source)
        items.append((pop, [legenddict[pop]]))
#     p.legend.label_text_font_size = '8pt'
# #     p.legend.location = "top_left"
#     p.legend.click_policy="hide"
# #     p.legend.visible=False
#     p.add_layout(p.legend[0], 'right')
    p.legend.visible=False
    for i in range(0, len(items), 30):
        legend1 = Legend(
            items=items[i:i+30],
            location=(0, 10 + i / 6))

        p.add_layout(legend1, 'right')
#     p.add_layout(legend2, 'right')
    p.legend.click_policy="hide"
    output_file('htmls/' + name + '.html')
    
    show(p)
In [8]:
# PCA
visualize('eurasia_2pcs.npy')
In [9]:
# t-SNE
for p in [10, 30, 50]:
    visualize(f'eurasia_tsne_p{p}.npy')
In [10]:
# UMAP
for nn in [10, 15, 30]:
    visualize(f'eurasia_umap_nn{nn}_md0.5.npy')
In [11]:
# PCA -> 15pcs -> UMAP
for nn in [10, 15, 30]:
    visualize(f'eurasia_15pcs-umap_nn{nn}_md0.5.npy')
In [7]:
# Vanilla Adversarial Autoencoder
for std in [0.5, 1, 2]:
    for lr in ['0.001', '0.0005', '0.0001']:
        name = f'eurasia_vanilla_lr{lr}_bs128_std{std}'
        visualize(f'../viz-autoencoder/{name}/res.npy', name=name)
In [5]:
# Adversarial Autoencoder with learnable cluster heads
for nc in [10, 20, 50]:
    for std in [0.5, 1, 2]:
        for lr in ['0.001', '0.0005', '0.0001']:
            name = f'eurasia_dim_{nc}_lr{lr}_bs32_std{std}'
            visualize(f'../viz-autoencoder/{name}/res.npy', name=name)
In [ ]:
# Adversarial Autoencoders with cluster heads as geographical locations of countries
for nc in [20]:
    for std in [0.5, 1, 2]:
        for lr in ['0.001', '0.0005', '0.0001']:
            name = f'eurasia_dim_{nc}_lr{lr}_bs32_std{std}_map'
            visualize(f'../viz-autoencoder/{name}/res.npy', name=name)
In [6]:
# Adversarial Autoencoders with cluster heads as geographical locations of countries + semi-supervised classification loss
for nc in [20]:
    for std in [0.5, 1, 2]:
        for lr in ['0.001', '0.0005', '0.0001']:
            name = f'eurasia_dim_{nc}_lr{lr}_bs32_std{std}_mapsemi'
            visualize(f'../viz-autoencoder/{name}/res.npy', name=name)
In [ ]: